In [1]:
import altair as alt
import pandas as pd
import numpy as np
import os
from toolz.curried import pipe
from vega_datasets import data
from altair import datum

# Handle large data sets (default shows only 5000)
# See here: https://altair-viz.github.io/user_guide/data_transformers.html
alt.data_transformers.disable_max_rows()

df = pd.read_csv("../../data/processed/cleaned_financial_data.csv", low_memory=True)
df['Year_of_Study'] = df['Year_of_Study'].astype(int)

Visualization 1 - Income Ranges and Vote Preference Map¶

In [2]:
df_income=df[[
    "Year_of_Study",
    "Voting_Preference",
    "State_Code_FIPS",
    "Income_Group",
    "Unemployment_Past_Year",
    "Unemployment_Next_Year"
]].dropna()

income_mapping = {
    'Low Income': 1,
    'Middle Income': 2,
    'Upper Middle Income': 3,
    'High Income': 4
}

# 创建数值型的 Income_Group 列(覆盖原列),并转为 int64 类型
df_income['Income_Level'] = df_income['Income_Group'].map(income_mapping).astype('int64')
df_income['Income_Level'] = (df_income['Income_Level'] - 1) / 3 * 100

# Step 1: Filter to keep only Democrat and Republican, then count votes
vote_counts = (
    df_income[df_income['Voting_Preference'].isin(['Democrat', 'Republican'])]
    .groupby(['State_Code_FIPS', 'Year_of_Study', 'Voting_Preference'])
    .size()
    .reset_index(name='Count')
)

# Step 2: Pivot the table to turn vote results into separate columns
vote_pivot = vote_counts.pivot(
    index=['State_Code_FIPS', 'Year_of_Study'],
    columns='Voting_Preference',
    values='Count'
)

# Step 3: Create a new column 'Dominant_Party' by comparing the two counts
def determine_party(row):
    if row['Democrat'] > row['Republican']:
        return 'Democrat'
    elif row['Republican'] > row['Democrat']:
        return 'Republican'
    else:
        return 'Neutral'

vote_pivot['Dominant_Party'] = vote_pivot.apply(determine_party, axis=1)

# Step 4: Reset index and clean up result
vote_pivot = vote_pivot.reset_index()

dominant_party_df = vote_pivot[['State_Code_FIPS', 'Year_of_Study', 'Dominant_Party']]

# Group the data by "State Code + Year", then compute the mean for all numeric columns
df_income_avg = df_income.groupby(['State_Code_FIPS', 'Year_of_Study'], as_index=False).mean(numeric_only=True)

# Perform a left join on the two tables using State_Code_FIPS and Year_of_Study
df_income_avg = df_income_avg.merge(
    dominant_party_df,
    on=['State_Code_FIPS', 'Year_of_Study'],
    how='left'  # Keep all records from df_income_avg
)

# Melt the data into long format: each row represents one state, one year, one party, and its income value
df_melt_income = df_income_avg.melt(
    id_vars=['State_Code_FIPS', 'Year_of_Study','Dominant_Party'],  # Keep FIPS, year, and party as identifier columns
    value_vars=['Income_Level'],                   # Only melt the 'Income_Group' column
    var_name='Attribute',                          # Name of the variable column (i.e., 'Income_Group')
    value_name='Value'                             # Name of the value column
)

# Pivot the long-format table back into wide format for use as a lookup structure in the map
df_wide_income = df_melt_income.pivot(
    index='State_Code_FIPS',                                           
    columns=['Attribute', 'Year_of_Study','Dominant_Party'],        
    values='Value'
).reset_index()

# Flatten MultiIndex column names, e.g., ('Income_Group', 2020, 'Democrat') → 'Income_Group_2020_Democrat'
df_wide_income.columns = [
    col[0] if col[1] == ''                         # If it's a single-level column (e.g., FIPS), keep it as is
    else f"{col[0]}_{col[1]}_{col[2]}"             # Otherwise, concatenate Attribute + Year + Party
    for col in df_wide_income.columns.to_flat_index()
]

# Record all column names for use in transform_lookup
all_wide_cols = [c for c in df_wide_income.columns]

# Create a slider interaction from 2004 to 2020, stepping every 4 years (election cycles)
slider = alt.binding_range(
    min=2004, max=2020, step=4, name='Year: '  # Range, step, and label
)

# Define a selection object to capture the current year selected by the slider
select_year = alt.selection_point(
    fields=['Year_of_Study'],   # Bound field
    bind=slider,                # Bind to slider control
    value=2020,                 # Default selected year
)
# Highlight a specific U.S. state when clicked (by FIPS code)
highlight_state = alt.selection_point(
    fields=['State_Code_FIPS'],     # bound to state FIPS code
    value=1,                        # default selected state
    empty='none'                    # no selection = no highlight
)
checkbox_party = alt.binding_checkbox(name='Show Winning Party Color')
show_party_color = alt.selection_point(
    name="party_toggle",
    fields=['show'],
    bind=checkbox_party,
    value=[{'show': True}]  # default checked box
)

# Create a selection bound to the legend, toggle off for single selection
select_point = alt.selection_point(
    fields=['Dominant_Party'],      # bound to field
    bind='legend',                  # bind to legend
    toggle=False                    # disable toggle for exclusive selection
)

party_selection = alt.selection_point(fields=['Voting_Preference'], 
                                      empty='all')

income_selection = alt.selection_point(fields=['Income_Group'], 
                                       empty='all')

# Load the US states topojson data for drawing the base map
states = alt.topo_feature(data.us_10m.url, 'states')

chart1_income = (
    alt.Chart(states) 
    .mark_geoshape() 
    .transform_lookup( 
        lookup='id', 
        from_=alt.LookupData(df_wide_income, 'State_Code_FIPS', all_wide_cols)
    )# Join map geometry with our data using State_Code_FIPS
    .transform_fold(
        fold=all_wide_cols,
        as_=['AttributeYear', 'Value']
    )# Fold all wide columns like 'Income_Group_2020' into two fields: AttributeYear & Value
    .transform_calculate(
        Year_of_Study="parseInt(split(datum.AttributeYear, '_')[2])",
        Attribute="split(datum.AttributeYear, '_')[0]",
        Dominant_Party="split(datum.AttributeYear, '_')[3]",
        show="true"
    ) 
    .transform_filter(select_year) # Filter to show only the selected year from the slider
    # .transform_filter(select_attribute) # Filter to show only the selected attribute from the legend
    .encode(
        stroke=alt.condition(
            highlight_state,
            alt.value('gray'), 
            alt.value('white')
        ),
        strokeWidth=alt.condition(
            highlight_state,
            alt.value(5), 
            alt.value(2) 
        ),
        strokeOpacity=alt.condition(
            highlight_state,
            alt.value(1),
            alt.value(0.1)
        ),
        color=alt.condition(
            show_party_color,
            alt.Color(
            'Dominant_Party:N',
            title='Winning Party',
            # legend=alt.Legend(
            #     orient='top',
            #     legendX=300,         
            #     legendY=-20,
            #     titleAnchor="middle", 
            #     titleAlign="center",
            #     offset=10               
            # ),
            scale=alt.Scale(domain=['Democrat', 'Republican', 'Neutral'],
                            range=['#1f77b4', '#d62728', '#999999']  # Blue, Red, Gray
                           )
        ),
            alt.value('grey')  # When unchecked, display everything in gray
        ),
        opacity=alt.Opacity(
            'Value:Q',
            title='Avg Income Level',
            scale=alt.Scale(domain=[20, 60]),
            legend=None
        ),
        tooltip=[
            alt.Tooltip('id:O', title='State FIPS'),
            alt.Tooltip('Value:Q', title='Income Level'),
            alt.Tooltip('Year_of_Study:N', title='Year'),
            alt.Tooltip('Dominant_Party:N', title='Voting Preference')
        ]
    ).project(type='albersUsa')# Use Albers USA map projection (standard for US maps)
    .properties(
        width=800,
        height=800,
        title='Average Income and Vote Preference by US State (2004–2020)'
    ).add_params(select_year, highlight_state, show_party_color)
    
)

vote_bar_chart = alt.Chart(
    df_income
    ).transform_filter(
    select_year  
).transform_filter(
    highlight_state  
).transform_filter(
    income_selection 
).mark_bar().encode(
    x=alt.X('Voting_Preference:N', title='Party', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('count():Q', title='Number of Votes'),
    color=alt.condition(
        party_selection,
        alt.Color('Voting_Preference:N',
                  title='Party', 
                  scale=alt.Scale(domain=['Democrat', 'Republican'], range=['#1f77b4', '#d62728'])),
        alt.value('lightgray') 
                    ),
    tooltip=[
        alt.Tooltip('Voting_Preference:N', title='Party'),
        alt.Tooltip('count():Q', title='Votes')
    ]
).properties(
    width=100,
    height=250,
    title='Vote Count in Selected State'
).add_params(
    party_selection
)

income_bar_chart = alt.Chart(df_income).transform_filter(
    select_year
).transform_filter(
    highlight_state
).transform_filter(
    party_selection
).mark_bar().encode(
    x=alt.X('Income_Group:N', 
            title='Income Group',
            sort=['Low Income', 'Middle Income', 'Upper Middle Income', 'High Income'],
            axis=alt.Axis(labelAngle=-30)),
    y=alt.Y('count():Q', title='Count of Respondents'),
    color=alt.condition(
        income_selection,
        alt.Color('Income_Group:N',
                  title='Income Group',
                  scale=alt.Scale(domain=['Low Income', 'Middle Income', 'Upper Middle Income', 'High Income'],
                                  range=['#e66101', '#fdb863', '#b2df8a', '#1a9641'])),
        alt.value('lightgray')  
                   ),
    tooltip=[
        alt.Tooltip('Income_Group:N', title='Income Group'),
        alt.Tooltip('count():Q', title='Respondent Count')
    ]
).properties(
    width=100,
    height=250,
    title='Income Group Distribution'
).add_params(
    income_selection
)

Visualization 2 - Unemployment Perception vs. Voting Preferences Bubble Matrix¶

In [3]:
vote_aggregated = df_income.groupby(['Year_of_Study',
                                     'State_Code_FIPS',
                                     'Unemployment_Past_Year', 
                                     'Unemployment_Next_Year', 
                                     'Voting_Preference']).size().reset_index(name='count')
total_counts = vote_aggregated.groupby(['State_Code_FIPS',
                                        'Year_of_Study',
                                        'Unemployment_Past_Year',
                                        'Unemployment_Next_Year'])["count"].transform("sum")
vote_aggregated["percentage"] = (vote_aggregated["count"] / total_counts) * 100

past_une_order = ["Better", "Same", "Worse"]
next_une_order = ["Better", "Same", "Worse"]

party_selector = alt.selection_point(
    name="bubble_party",
    fields=["Voting_Preference"],
    bind="legend",
    empty="all"
)

chart2 = alt.Chart(vote_aggregated).mark_circle(opacity=0.5).transform_filter(
    party_selection   
).transform_filter(
    highlight_state  
).encode(
    x=alt.X('Unemployment_Past_Year:N', sort=past_une_order, title=None, axis=alt.Axis(labelAngle=-30)),
    y=alt.Y("Unemployment_Next_Year:N", sort=next_une_order, title=None),
    color=alt.condition(
        party_selection,
        alt.Color('Voting_Preference:N', 
                   title="Party",
                  # legend=alt.Legend(orient='bottom', direction='vertical'),
                  # legend=None,
                  scale=alt.Scale(domain=["Democrat", "Republican"], range=["blue", "red"])),
        alt.value('lightgray')
    ), 
    size=alt.Size("percentage:Q", 
                  title='Percentage',
                  # legend=alt.Legend(orient='bottom'), 
                  scale=alt.Scale(range=[5, 500])), 
    row=alt.Row('Year_of_Study', title=None, header=alt.Header(labelOrient="top")),
    tooltip=["Voting_Preference", "percentage"]
# ).add_params(
#     party_selector
).properties(
    width=90,
    height=90,
    title=["Voting Preference vs.", "Past and Future", "Unemployment Perception", "(2000–2020)"]
)


y_label = alt.Chart(pd.DataFrame({'text': ['Future Unemployment Perception']})).mark_text(
    angle=270,
    align='center',
    baseline='middle',
    fontWeight='bold',
    fontSize=16,
    dx=-270,
    dy=0
).encode(
    text='text:N'
).properties(
    width=0,  
    height=0
)

x_label = alt.Chart(pd.DataFrame({'text': ['Past Unemployment Perception']})).mark_text(
    align='center',
    baseline='bottom',
    fontWeight='bold',
    fontSize=16
).encode(
    text='text:N'
).properties(
    width=0,  
    height=0
)

combined_chart2= y_label | (chart2
           &
           x_label) 

Visualization 3 - Shifts in Economic Attitudes and Party Alignment Over Election Cycles¶

In [4]:
df_thermometer=df[[
    "Year_of_Study",
    "State_Code_FIPS",
    "Voting_Preference",
    "Thermometer_Business",
    "Thermometer_Labor_Union",
    "Thermometer_Middle_Class",
    "Thermometer_Welfare",
    "Thermometer_Poor",
]].dropna()

thermometer_vars = [
    "Thermometer_Business",
    "Thermometer_Labor_Union",
    "Thermometer_Middle_Class",
    "Thermometer_Welfare",
    "Thermometer_Poor"
]

attribute_label_map = {
    "Thermometer_Unions": "Unions",
    "Thermometer_Poor": "The Poor",
    "Thermometer_Business": "Big Business",
    "Thermometer_Labor_Union": "Labor Union",
    "Thermometer_Middle_Class": "Middle Class",
    "Thermometer_Welfare": "Welfare",
    "Thermometer_Poor": "The Poor"
}

fips_to_state = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
    '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
    '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
    '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
    '24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
    '29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
    '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
    '45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
    '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
    '56': 'WY'
}

df_thermometer['State_Code_FIPS'] = df_thermometer['State_Code_FIPS'].astype(int).astype(str).str.zfill(2)
df_thermometer['State_Abbrev'] = df_thermometer['State_Code_FIPS'].map(fips_to_state)

df_melted = df_thermometer.melt(
    id_vars=["Year_of_Study", "State_Abbrev", "Voting_Preference"],
    value_vars=thermometer_vars,
    var_name="Attribute",
    value_name="Score"
)

df_melted["AttributeLabel"] = df_melted["Attribute"].map(attribute_label_map)

# Create dropdown control bound to variable selector
dropdown = alt.binding_select(
    options=df_melted["AttributeLabel"].unique().tolist(),
    name="Select Thermometer: "
)

attitude_selector = alt.selection_point(
    fields=["AttributeLabel"],
    bind=dropdown,
    value="Big Business"
)

state_selector = alt.selection_point(
    name="density_party",
    fields=["State_Abbrev"],
    empty="all"
)

# party_selector = alt.selection_point(
#     fields=["Voting_Preference"],
#     bind="legend",
#     empty="all"
# )

at_brush = alt.selection_interval(
    encodings=["x"], 
    name="score_range",
    resolve='global'
)

# bar chart
thermo_avg_bar = alt.Chart(df_melted).transform_filter(
    attitude_selector
).transform_filter(
    party_selector
).transform_filter(
    at_brush
).mark_bar().encode(
    x=alt.X('State_Abbrev:N', sort='-y', title='State'),
    y=alt.Y('mean(Score):Q', title='Average Score'),
    color=alt.condition(
        state_selector,
        alt.Color('mean(Score):Q', 
                  # legend=alt.Legend(orient='bottom'),
                  scale=alt.Scale(scheme='oranges')),
        alt.value('lightgray')
    ),
    tooltip=[
        alt.Tooltip('State_Abbrev:N', title='State'),
        alt.Tooltip('mean(Score):Q', title='Average Score', format='.1f')
    ]
).add_params(
    state_selector
).properties(
    width=1550,
    height=100,
    title='Average Thermometer Score by State'
)

thermo_density_chart = alt.Chart(df_melted).transform_filter(
    attitude_selector
).transform_filter(
    state_selector
).transform_density(
    "Score",
    groupby=["Voting_Preference",'Year_of_Study'],
    as_=["Score", "Density"]
).mark_area(opacity=0.4).encode(
    x=alt.X("Score:Q", scale=alt.Scale(domain=[0, 100]), title="Thermometer Score"),
    y=alt.Y("Density:Q", title="Density",stack=None),
    color=alt.condition(
        party_selector,
        alt.Color('Voting_Preference:N',
                  title='Party (Clickable)',
                  scale=alt.Scale(domain=["Democrat", "Republican"], range=["blue", "red"])),
        alt.value('lightgray')
    ),
    column=alt.Column('Year_of_Study:O', title='Year'), 
    tooltip=[
        alt.Tooltip("Voting_Preference:N", title="Party"),
        alt.Tooltip("Score:Q", format=".1f"),
        alt.Tooltip("Density:Q", format=".3f")
    ]
).add_params(
    attitude_selector,
    party_selector,
    at_brush
).properties(
    width=500,
    height=150,
    title='Distribution of Scores by Party in Selected State'
).resolve_scale(
    x='shared'  
)
# combined_charts
In [7]:
combined_charts = alt.vconcat(
    alt.hconcat(
        chart1_income.add_params(highlight_state),
        alt.vconcat(
            vote_bar_chart, 
            income_bar_chart
        ).resolve_scale(color='independent'),
        combined_chart2,
    ).resolve_scale(
        color='independent',
        size='independent'
    ),
    thermo_avg_bar,
    thermo_density_chart,
).resolve_scale(
    color='independent'
)

def apply_default_config(chart):
    return chart.configure_view(
        strokeWidth=0  # No border for the chart
    ).configure_title(
        fontSize=18
    ).configure_axis(
        labelFontSize=14,
        titleFontSize=16
    ).configure_header(
        labelFontSize=15,
        titleFontSize=17
    ).configure_legend(
        labelFontSize=14,
        titleFontSize=16
    ).configure_concat(
        spacing=0  # No spacing between the charts
    )

apply_default_config(combined_charts)
Out[7]:
In [ ]: